/*
 * Copyright 2004-2005 The Apache Software Foundation or its licensors,
 *                     as applicable.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.xml;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.jcr.ItemVisitor;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.PathNotFoundException;
import javax.jcr.Property;
import javax.jcr.PropertyIterator;
import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.Value;
import javax.jcr.ValueFormatException;

import org.apache.commons.codec.binary.Base64;
import org.apache.jackrabbit.name.QName;
import org.apache.xerces.util.XMLChar;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/**
 * Generic document view exporter for JCR content repositories.
 * This class can be used to implement the XML document view export
 * operations using nothing but the standard JCR interfaces. The
 * export operation is implemented as an ItemVisitor that generates
 * the document view SAX event stream as it traverses the selected
 * JCR content tree.
 *
 * <h2>Implementing a customized XML serializer</h2>
 * <p>
 * A client can extend this class to provide customized XML serialization
 * formats. By overriding the protected includeProperty() and includeNode()
 * methods, a subclass can select which properties and nodes will be included
 * in the serialized XML stream.
 * <p>
 * For example, the following code implements an XML serialization that only
 * contains the titles of the first two levels of the node tree.
 * <pre>
 *     ContentHandler handler = ...;
 *     final Node parent = ...;
 *     parent.accept(
 *         new DocumentViewExportVisitor(handler, true, false) {
 *
 *             protected boolean includeProperty(Property property)
 *                     throws RepositoryException {
 *                 return property.getName().equals("title");
 *             }
 *
 *             protected boolean includeNode(Node node)
 *                     throws RepositoryException {
 *                 return (node.getDepth() <= parent.getDepth() + 2);
 *             }
 *
 *         });
 * </pre>
 *
 * <h2>Implementing the standard export methods</h2>
 * <p>
 * The following is an example of the
 * Session.exportDocumentView(String, ContentHandler, boolean, boolean)
 * method implemented in terms of this exporter class:
 * <pre>
 *     public void exportDocumentView(
 *             String absPath, ContentHandler handler,
 *             boolean skipBinary, boolean noRecurse)
 *             throws PathNotFoundException, SAXException, RepositoryException {
 *         Item item = getItem(absPath);
 *         if (item.isNode()) {
 *             item.accept(new DocumentViewExportVisitor(
 *                     handler, skipBinary, noRecurse));
 *         } else {
 *             throw new PathNotFoundException("Invalid node path: " + path);
 *         }
 *     }
 * </pre>
 * <p>
 * The companion method
 * Session.exportDocumentView(String, OutputStream, boolean, boolean)
 * can be implemented in terms of the above method:
 * <pre>
 *     public void exportDocumentView(
 *             String absPath, OutputStream output,
 *             boolean skipBinary, boolean noRecurse)
 *             throws PathNotFoundException, IOException, RepositoryException {
 *         try {
 *             SAXTransformerFactory factory = (SAXTransformerFactory)
 *                 SAXTransformerFactory.newInstance();
 *             TransformerHandler handler = factory.newTransformerHandler();
 *             handler.setResult(new StreamResult(out));
 *             exportDocumentView(absPath, handler, skipBinary, noRecurse);
 *         } catch (TransformerConfigurationException e) {
 *             throw new IOException(
 *                     "Unable to configure a SAX transformer: " + e.getMessage());
 *         } catch (SAXException e) {
 *             throw new IOException(
 *                     "Unable to serialize a SAX stream: " + e.getMessage());
 *         }
 *     }
 * </pre>
 *
 * @see ItemVisitor
 * @see Session#exportDocumentView(String, ContentHandler, boolean, boolean)
 * @see Session#exportDocumentView(String, java.io.OutputStream, boolean, boolean)
 */
public class DocumentViewExportVisitor implements ItemVisitor {

    /**
     * The SAX content handler for the serialized XML stream.
     */
    private final ContentHandler handler;

    /**
     * Flag to skip all binary properties.
     */
    private final boolean skipBinary;

    /**
     * Flag to only serialize the selected node.
     */
    private final boolean noRecurse;

    /**
     * The root node of the serialization tree. This is the node that
     * is mapped to the root element of the serialized XML stream.
     */
    private Node root;

    /**
     * The current session.
     */
    private Session session;

    /**
     * The prefix mapped to the <code>http://www.jcp.org/jcr/1.0</code>
     * namespace in the current session.
     */
    private String jcr;

    /**
     * Creates an visitor for exporting content using the document view
     * format. To actually perform the export operation, you need to pass
     * the visitor instance to the selected content node using the
     * Node.accept(ItemVisitor) method.
     *
     * @param handler the SAX event handler
     * @param skipBinary flag for ignoring binary properties
     * @param noRecurse flag for not exporting an entire content subtree
     */
    public DocumentViewExportVisitor(
            ContentHandler handler, boolean skipBinary, boolean noRecurse) {
        this.handler = handler;
        this.skipBinary = skipBinary;
        this.noRecurse = noRecurse;
        this.root = null;
    }

    /**
     * Ignored. Properties are included as attributes of node elements.
     *
     * @param property ignored property
     * @see ItemVisitor#visit(Property)
     */
    public final void visit(Property property) {
    }

    /**
     * Exports the visited node using the document view serialization format.
     * This method is the main entry point to the serialization mechanism.
     * It manages the opening and closing of the SAX event stream and the
     * registration of the namespace mappings. The process of actually
     * generating the document view SAX events is spread into various
     * private methods, and can be controlled by overriding the protected
     * includeProperty() and includeNode() methods.
     *
     * @param node the node to visit
     * @throws RepositoryException on repository errors
     * @see ItemVisitor#visit(Node)
     * @see #includeProperty(Property)
     * @see #includeNode(Node)
     */
    public final void visit(Node node) throws RepositoryException {
        try {
            // start document
            if (root == null) {
                root = node;
                session = node.getSession();
                jcr = session.getNamespacePrefix(QName.NS_JCR_URI); 
                handler.startDocument();

                String[] prefixes = session.getNamespacePrefixes();
                for (int i = 0; i < prefixes.length; i++) {
                    String uri = session.getNamespaceURI(prefixes[i]);
                    if (!uri.equals(QName.NS_XML_URI)) {
                        handler.startPrefixMapping(prefixes[i], uri);
                    }
                }
            }

            // export current node
            String name = node.getName();
            if (!name.equals(jcr + ":xmltext")) {
                int colon = name.indexOf(':');
                if (colon != -1) {
                    String prefix = name.substring(0, colon);
                    name = name.substring(colon + 1);
                    exportNode(node, prefix, escapeName(name));
                } else if (name.length() > 0) {
                    exportNode(node, "", escapeName(name));
                } else {
                    exportNode(node, jcr, "root");
                }
            } else if (node != root) {
                exportText(node);
            } else {
                throw new RepositoryException("Cannot export jcr:xmltext");
            }

            // end document
            if (root == node) {
                String[] prefixes = session.getNamespacePrefixes();
                for (int i = 0; i < prefixes.length; i++) {
                    String uri = session.getNamespaceURI(prefixes[i]);
                    if (!uri.equals(QName.NS_XML_URI)) {
                        handler.endPrefixMapping(prefixes[i]);
                    }
                }
                handler.endDocument();
            }
        } catch (IOException e) {
            throw new RepositoryException(e);
        } catch (SAXException e) {
            throw new RepositoryException(e);
        }
    }

    /**
     * Checks whether the given property should be included in the XML
     * serialization. This method returns <code>true</code> by default,
     * but subclasses can override this method to implement more selective
     * XML serialization.
     *
     * @param property the property to check
     * @return true if the property should be included, false otherwise
     * @throws RepositoryException on repository errors
     */
    protected boolean includeProperty(Property property)
            throws RepositoryException {
        return true;
    }

    /**
     * Checks whether the given node should be included in the XML
     * serialization. This method returns <code>true</code> by default,
     * but subclasses override this method to implement selective
     * XML serialization.
     * <p>
     * Note that this method is only called for the descendants of the
     * root node of the serialized tree. Also, this method is never called
     * if the noRecurse flag is set because no descendant nodes will be
     * serialized anyway.
     *
     * @param node the node to check
     * @return true if the node should be included, false otherwise
     * @throws RepositoryException on repository errors
     */
    protected boolean includeNode(Node node) throws RepositoryException {
        return true;
    }

    /**
     * Serializes a special "jcr:xmltext" node. Only the contents of the
     * "jcr:xmlcharacters" property will be written as characters to the
     * XML stream and no elements or attributes will be generated for
     * this node or any other child nodes or properties.
     *
     * @param node the "jcr:xmltext" node
     * @throws SAXException on SAX errors
     * @throws RepositoryException on repository errors
     */
    private void exportText(Node node)
            throws SAXException, RepositoryException {
        try {
            Property property = node.getProperty(jcr + ":xmlcharacters");
            char[] characters = filterXML(property.getString());
            handler.characters(characters, 0, characters.length);
        } catch (PathNotFoundException ex) {
            // ignore empty jcr:xmltext nodes
        } catch (ValueFormatException ex) {
            // ignore non-string jcr:xmlcharacters properties
        }
    }

    /**
     * Serializes the given node to the XML stream. Generates an element
     * with the given name, and maps node properties to attributes of the
     * generated element. If the noRecurse flag is false, then child nodes
     * are serialized as sub-elements.
     *
     * @param node the given node
     * @param prefix namespace prefix
     * @param name escaped local name
     * @throws IOException if a problem with binary values occurred
     * @throws SAXException on SAX errors
     * @throws RepositoryException on repository errors
     */
    private void exportNode(Node node, String prefix, String name)
            throws IOException, SAXException, RepositoryException {
        // Set up element name components
        String prefixedName = name;
        if (prefix.length() > 0) {
            prefixedName = prefix + ":" + name;
        } else {
            prefixedName = name;
        }
        String uri = session.getNamespaceURI(prefix);
        if (uri.length() == 0) {
            uri = null;
        }

        // Start element
        handler.startElement(uri, name, prefixedName, getAttributes(node));

        // Visit child nodes (unless denied by the noRecurse flag)
        if (!noRecurse) {
            NodeIterator children = node.getNodes();
            while (children.hasNext()) {
                Node child = children.nextNode();
                if (includeNode(child)) {
                    child.accept(this);
                }
            }
        }

        // End element
        handler.endElement(uri, name, prefixedName);
    }

    /**
     * Returns the document view attributes of the given Node. The
     * properties of the node are mapped to XML attributes directly as
     * name-value pairs.
     *
     * @param node the given node
     * @return document view attributes of the node
     * @throws IOException if a problem with binary values occurred
     * @throws RepositoryException on repository errors
     */
    private Attributes getAttributes(Node node)
            throws IOException, RepositoryException {
        AttributesImpl attributes = new AttributesImpl();
        
        PropertyIterator properties = node.getProperties();
        while (properties.hasNext()) {
            Property property = properties.nextProperty();
            if (!(skipBinary && property.getType() == PropertyType.BINARY)
                    && includeProperty(property)) {
                String name = property.getName();
                String value = escapeValue(property);

                String prefixedName;
                String uri;
                int colon = name.indexOf(':');
                if (colon != -1) {
                    String prefix = name.substring(0, colon);
                    uri = session.getNamespaceURI(prefix);
                    name = escapeName(name.substring(colon + 1));
                    prefixedName = prefix + ":" + name;
                } else {
                    uri = session.getNamespaceURI("");
                    name = escapeName(name);
                    prefixedName = name;
                }
                attributes.addAttribute(uri, name, prefixedName, "CDATA", value);
            }
        }
        
        return attributes;
    }

    private static char[] filterXML(String value) {
        char[] characters = value.toCharArray();
        for (int i = 0; i < characters.length; i++) {
            if (XMLChar.isInvalid(characters[i])) {
                characters[i] = ' '; // TODO: What's the correct escape?
            }
        }
        return characters;
    }

    /**
     * Escapes the given JCR name according to the rules of section
     * 6.4.3 of the JSR 170 specification.
     *
     * @param name JCR name
     * @return escaped name
     */
    private static String escapeName(String name) {
        StringBuffer buffer = new StringBuffer();

        int colon = name.indexOf(':');
        if (colon != -1) {
            buffer.append(name.substring(0, colon + 1));
            name = name.substring(colon + 1);
        }

        Pattern pattern = Pattern.compile("_([0-9a-fA-F]{4}_)");
        Matcher matcher = pattern.matcher(name);
        char[] characters = filterXML(matcher.replaceAll("_x005f_$1"));

        for (int i = 0; i < characters.length; i++) {
            char ch = characters[i];
            if ((i == 0) ? XMLChar.isNCNameStart(ch) : XMLChar.isNCName(ch)) {
                String hex = Integer.toHexString((int) ch);
                buffer.append("_x");
                for (int j = 4; j > hex.length(); j--) {
                    buffer.append('0');
                }
                buffer.append(hex);
                buffer.append('_');
            } else {
                buffer.append(ch);
            }
        }

        return buffer.toString();
    }

    /**
     * Returns the string representation of the given value. Binary values
     * are encoded in Base64, while other values are just converted to their
     * string format.
     *
     * @param value original value
     * @param escape whether to apply value escapes
     * @return escaped value
     * @throws IOException if a problem with binary values occurred
     * @throws RepositoryException on repository errors
     */
    private static String escapeValue(Value value, boolean escape)
            throws IOException, RepositoryException {
        if (value.getType() == PropertyType.BINARY) {
            ByteArrayOutputStream buffer = new ByteArrayOutputStream();
            InputStream input = value.getStream();
            try {
                byte[] bytes = new byte[4096];
                for (int n = input.read(bytes); n != -1; n = input.read(bytes)) {
                    buffer.write(bytes, 0, n);
                }
            } finally {
                input.close();
            }
            return new String(Base64.encodeBase64(buffer.toByteArray()), "ASCII");
        } else if (escape) {
            StringBuffer buffer = new StringBuffer();
            Pattern pattern = Pattern.compile("_([0-9a-fA-F]{4}_)");
            Matcher matcher = pattern.matcher(value.getString());
            char[] characters = filterXML(matcher.replaceAll("_x005f_$1"));
            for (int i = 0; i < characters.length; i++) {
                if (characters[i] == ' ') {
                    buffer.append("_x0020_");
                } else if (characters[i] == '\t') {
                    buffer.append("_x0009_");
                } else if (characters[i] == '\r') {
                    buffer.append("_x000D_");
                } else if (characters[i] == '\n') {
                    buffer.append("_x000A_");
                } else {
                    buffer.append(characters[i]);
                }
            }
            return buffer.toString();
        } else {
            return new String(filterXML(value.getString()));
        }
    }

    /**
     * Returns the document view representation of the given property.
     * Multiple values are combined into a space-separated list of
     * space-escaped string values, binary values are encoded using the
     * Base64 encoding, and other values are simply returned using their
     * default string representation.
     *
     * @param property the given property
     * @return document view representation of the property value
     * @throws IOException if a problem with binary values occurred
     * @throws RepositoryException on repository errors
     */
    private static String escapeValue(Property property)
            throws IOException, RepositoryException {
        if (property.getDefinition().isMultiple()) {
            StringBuffer buffer = new StringBuffer();
            Value[] values = property.getValues();
            for (int i = 0; i < values.length; i++) {
                if (i > 0) {
                    buffer.append(' ');
                }
                buffer.append(escapeValue(values[i], true));
            }
            return buffer.toString();
        } else {
            return escapeValue(property.getValue(), false);
        }
    }

}
